5.2 Main Analysis
- Alpha Diversity Plot - A.
Raw alpha-diversity, i.e. without rarefying
comparing alpha diversity based on raw data has the huge problem of ignoring sample size differences. Usually there is a trend that more reads correlate with higher diversity.
Here, therefore I checked, whether there are significant sample size differences between the groups.
We further look at the residuals of a linear fit alpha diversity vs sample size to correct for this confounder.
Check whether sample_sums/sample sizes/library sizes differ between groups
sample size adjustment has no influence on richness, so sample_sizes are compared for raw counts
AlphaDiversity Main Analysis.
Alpha Diversity = BoxPlot
AlphaDiversity Main Analysis.
#https://grunwaldlab.github.io/analysis_of_microbiome_community_data_in_r/07--diversity_stats.html
anova_result <- aov(df2$Observed ~ Transect * Ecotype, data = df2)
summary(aov(df2$Observed ~ Transect * Ecotype, data = df2))## Df Sum Sq Mean Sq F value Pr(>F)
## Transect 2 1715 858 0.733 0.488718
## Ecotype 4 151580 37895 32.400 0.00000000017 ***
## Transect:Ecotype 8 51828 6478 5.539 0.000243 ***
## Residuals 30 35087 1170
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aresult <- HSD.test(anova_result, "Ecotype", group = TRUE)Plot by mean
AlphaDiversity Main Analysis.
- Beta Diversity Plot - A.
gpca <- ordinate(Plots, "MDS")
# Scree plot
plot_scree(gpca, "Scree Plot for MDS Analysis")#Remove ASVs that do not show appear more than 5 times in more than half the samples
genefilter = genefilter_sample(P_Plots, filterfun_sample(function(x) x > 5), A=0.5*nsamples(P_Plots))
genefilter_tax = prune_taxa(genefilter, P_Plots)
#Transform to even sampling depth.
genefilter_tax_t = transform_sample_counts(genefilter_tax, function(x) 1E6 * x/sum(x))
genefilter_tax_t.ord <- ordinate(genefilter_tax_t, "NMDS")## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1177618
## Run 1 stress 0.1177616
## ... New best solution
## ... Procrustes: rmse 0.0001617719 max resid 0.0009389746
## ... Similar to previous best
## Run 2 stress 0.1177617
## ... Procrustes: rmse 0.000219552 max resid 0.001280429
## ... Similar to previous best
## Run 3 stress 0.1345524
## Run 4 stress 0.1177616
## ... Procrustes: rmse 0.000008377792 max resid 0.00003453982
## ... Similar to previous best
## Run 5 stress 0.136347
## Run 6 stress 0.1177616
## ... New best solution
## ... Procrustes: rmse 0.00005481706 max resid 0.0003228071
## ... Similar to previous best
## Run 7 stress 0.117762
## ... Procrustes: rmse 0.0002667922 max resid 0.001550909
## ... Similar to previous best
## Run 8 stress 0.1492259
## Run 9 stress 0.1177616
## ... New best solution
## ... Procrustes: rmse 0.000007245489 max resid 0.00002199715
## ... Similar to previous best
## Run 10 stress 0.1161722
## ... New best solution
## ... Procrustes: rmse 0.04932887 max resid 0.2692485
## Run 11 stress 0.1177617
## Run 12 stress 0.1403949
## Run 13 stress 0.1177616
## Run 14 stress 0.1177616
## Run 15 stress 0.1494246
## Run 16 stress 0.1151887
## ... New best solution
## ... Procrustes: rmse 0.01123477 max resid 0.06391121
## Run 17 stress 0.1177616
## Run 18 stress 0.1177616
## Run 19 stress 0.1177616
## Run 20 stress 0.1509306
## *** No convergence -- monoMDS stopping criteria:
## 18: stress ratio > sratmax
## 2: scale factor of the gradient < sfgrmin
#
genefilter_tax_t@sam_data$Transect <- factor(genefilter_tax_t@sam_data$Transect, levels = c(1,2,3), labels = c("1","2", "3"))
genefilter_tax_t@sam_data$Ecotype <- factor(genefilter_tax_t@sam_data$Ecotype, levels = c("Meadow","Alder zone","Spruce-Alder", "Spruce","Pine"), labels = c("Meadow","Alder","Spruce-Alder", "Spruce","Pine"))
#
ord_DataFrame_tx <- plot_ordination(genefilter_tax_t, genefilter_tax_t.ord, "samples", justDF ="TRUE")
#
ordplot_tx <- (ordplot <- plot_ordination(genefilter_tax_t, genefilter_tax_t.ord, "samples", color="Ecotype", shape = "Transect") +
geom_hline(yintercept = 0, linetype = 'dashed', alpha = 0.3) +
geom_vline(xintercept = 0, linetype = 'dashed', alpha = 0.3) +
geom_point(size = 5, color = "grey") +
scale_shape_manual(values=c(23,21,24)) +
theme_pubr(border = TRUE) +
coord_fixed(ratio = 1) +
theme(axis.text=element_text(size=14),
axis.text.x = element_text(size = 12, hjust = 0.5),
axis.title.y = element_text(size = 18),
legend.text=element_text(size=14),
legend.title=element_text(size=0),
legend.position="bottom",
axis.title.x = element_text(size = 18),
strip.text.x = element_text(size = 20, face = "bold"))) +
scale_color_manual(values = cols_Ecotype)#+
ordplot_tx$layers <- ordplot_tx$layers[-1]
ordplot_tx +
geom_point(data=ord_DataFrame_tx, aes(x = NMDS1, y = NMDS2, fill =Ecotype, size = 4)) +
scale_fill_manual(values = cols_Ecotype) pord2 = (ordplot <- plot_ordination(genefilter_tax_t, genefilter_tax_t.ord, "samples", color="Ecotype", shape = "Transect") +
geom_hline(yintercept = 0, linetype = 'dashed', alpha = 0.3) +
geom_vline(xintercept = 0, linetype = 'dashed', alpha = 0.3) +
geom_point(size = 5, color = "grey") +
scale_shape_manual(values=c(23,21,24)) +
geom_polygon(aes(fill=Ecotype, alpha = 0.8)) +
theme_pubr(border = TRUE) +
coord_fixed(ratio = 1) +
theme(axis.text=element_text(size=14),
axis.text.x = element_text(size = 12, hjust = 0.5),
axis.title.y = element_text(size = 18),
legend.text=element_text(size=14),
legend.title=element_text(size=0),
legend.position="bottom",
axis.title.x = element_text(size = 18),
strip.text.x = element_text(size = 20, face = "bold"))) +
scale_color_manual(values = cols_Ecotype)
pord2$layers <- pord2$layers[-1]
pord2 +
geom_point(data=ord_DataFrame_tx, aes(x = NMDS1, y = NMDS2, fill =Ecotype, size = 4)) +
scale_fill_manual(values = cols_Ecotype) - Beta Diversity Plot - B.
Ecotype
- Permutational analysis of variance
HeatMap
#Heatmap
meta_data_2 <- read.table("/Users/juliana/Documents/NBIS/Projects/6198/Metadado/metadata_copynumber_sampleA.tsv", header=T, row.names=1, check.names=T, sep="\t")
sample_data(P_Plots) <- meta_data_2
P_Plots@sam_data$CopyNumber2 <- as.numeric(P_Plots@sam_data$CopyNumber)
#Remove ASVs that do not show appear more than 3 times in more than 30% the samples
genefilter2 = genefilter_sample(P_Plots, filterfun_sample(function(x) x > 3), A=0.3*nsamples(P_Plots))
genefilter_tax2 = prune_taxa(genefilter2, P_Plots)
#MetaData
metaR <- meta(genefilter_tax2)
# Reorder Transect
metaR$Transect = factor(metaR$Transect, levels = c("1", "2","3")) #D46C4E
metaR_Transect <- c("#C0C0C0","#BC8F8F","#F5DEB3") #D46C4E #43978D
names(metaR_Transect) <- levels(metaR$Transect)
# Reorder Ecotype
metaR$Ecotype <- factor(metaR$Ecotype, levels = c("Meadow","Alder zone","Spruce-Alder", "Spruce","Pine"), labels = c("Meadow","Alder","Spruce-Alder", "Spruce","Pine"))
metaR_Ecotype <- c("#77A515", "#264D59", "#43978D", "#D46C4E", "#2b8cbe")
names(metaR_Ecotype) <- levels(metaR$Ecotype)
# Add to a list, where names match those in factors dataframe
metaR_AnnColour <- list(
Transect = metaR_Transect)
metaR_AnnColour2 <- list(
Ecotype = metaR_Ecotype)
metaR_AnnColourx <- list(
Transect = metaR_Transect,
Ecotype = metaR_Ecotype)
# Check the output
metaR_AnnColour## $Transect
## 1 2 3
## "#C0C0C0" "#BC8F8F" "#F5DEB3"
SampleOrder = order(metaR$Ecotype, metaR$Transect)
meta.factors <- select(metaR, Transect, Ecotype)
metaR_Filter_composi.filt.abs <- P_Plots
#Subset #Multiplicando pela soma das reads, normalizando
metaR_Filter_composi.filt.ab <- transform_sample_counts(genefilter_tax2, function(x)100*x/sum(x))
###
{
for(n in 1:nsamples(metaR_Filter_composi.filt.ab))
otu_table(metaR_Filter_composi.filt.abs)<- otu_table(metaR_Filter_composi.filt.ab)*sample_data(metaR_Filter_composi.filt.ab@sam_data)$CopyNumber [n]
}
abs_plot <- data.frame(otu_table(metaR_Filter_composi.filt.abs))
#Colocando ASV names na OTU table
rownames(abs_plot)## [1] "ASV1" "ASV3" "ASV4" "ASV5" "ASV7" "ASV8" "ASV11" "ASV13"
## [9] "ASV14" "ASV18" "ASV21" "ASV22" "ASV25" "ASV29" "ASV31" "ASV34"
## [17] "ASV37" "ASV39" "ASV46" "ASV48" "ASV52" "ASV55" "ASV56" "ASV57"
## [25] "ASV62" "ASV73" "ASV82" "ASV83" "ASV84" "ASV85" "ASV90" "ASV99"
## [33] "ASV100" "ASV106" "ASV111" "ASV125" "ASV129" "ASV130" "ASV132" "ASV135"
## [41] "ASV138" "ASV140" "ASV153" "ASV155" "ASV162" "ASV163" "ASV170" "ASV174"
## [49] "ASV176" "ASV187" "ASV197" "ASV202" "ASV206" "ASV208" "ASV210" "ASV212"
## [57] "ASV218" "ASV220" "ASV222" "ASV229" "ASV237" "ASV248" "ASV265" "ASV271"
## [65] "ASV273" "ASV278" "ASV290" "ASV292" "ASV293" "ASV295" "ASV299" "ASV311"
## [73] "ASV316" "ASV319" "ASV341" "ASV347" "ASV349" "ASV352" "ASV362" "ASV376"
## [81] "ASV383" "ASV386" "ASV392" "ASV416" "ASV423" "ASV429" "ASV443" "ASV453"
## [89] "ASV467" "ASV479" "ASV546" "ASV572" "ASV597" "ASV610" "ASV633" "ASV778"
## [97] "ASV779" "ASV787"
taxonomy_otu_compositional_copy <- data.frame(tax_table(metaR_Filter_composi.filt.abs))
tax_m <- taxonomy_otu_compositional_copy$taxa_name
rownames(abs_plot) <- tax_m
abs_plot##Plot
plot_Log10_max <- log10(abs_plot)/max(log10(abs_plot) +1)
#plot_Log10_max <- log10(abs_plot +1)
plot_Log10_max[plot_Log10_max == "-Inf"] <- 0
colsHeat<- c("#F7F7F7", "#92C5DE", "#0571B0", "#F4A582", "#CA0020")
pheatmap(plot_Log10_max[, SampleOrder],
cluster_cols = FALSE,
cluster_rows = TRUE,
gaps_row = 5,
clustering_distance_rows = "euclidean",
clustering_distance_cols = "euclidean",
annotation_colors = metaR_AnnColourx, annotation_col = meta.factors,
show_colnames = FALSE,
color = colorRampPalette(c("white", colsHeat))(50),
border_color = "#f8edeb",
display_numbers = FALSE)meta.factorsT <- select(metaR, Transect)
meta.factorsE <- select(metaR, Ecotype)Taxa_DataFrame <- data.frame(tax_table(metaR_Filter_composi.filt.abs))
Taxa_DataFrame_F <- Taxa_DataFrame %>% select(1,2,3,4,5,6,10)
Taxa_Matrix <- as.matrix(Taxa_DataFrame_F)
pseq_plot_heat <- metaR_Filter_composi.filt.abs
rown <- rownames(sample_data(pseq_plot_heat))
sample_data(pseq_plot_heat)$SampleID <- rown
sample_data(pseq_plot_heat) <- (sample_data(pseq_plot_heat)[, c(14,1,2,3,4,5,6,7,8,9,10,11,12,13)])
tax_table(pseq_plot_heat) <- as.matrix(Taxa_Matrix)
count_f <- data.frame(otu_table(pseq_plot_heat))
meta_f <- data.frame(sample_data(pseq_plot_heat))
taxa_f <- data.frame(tax_table(pseq_plot_heat))
names(taxa_f)[7] <- "Species"
d <- amp_load(
otutable = count_f,
metadata = meta_f,
taxonomy = taxa_f
)## Warning: Could not find a column named OTU/ASV in otutable, using rownames as
## OTU ID's
## Warning: Could not find a column named OTU/ASV in taxonomy, using rownames as
## OTU ID's
teste <- amp_heatmap(
d,
group_by = c("Transect"),
facet_by = c("Ecotype"),
normalise = TRUE,
#tax_add = "OTU",
tax_aggregate = "Species",
tax_show = 100,
showRemainingTaxa = FALSE,
tax_class = NULL,
tax_empty = "best",
plot_values = FALSE,
plot_values_size = 3,
plot_legendbreaks = NULL,
plot_colorscale = "log10",
plot_na = TRUE,
measure = "mean",
#sort_by = "Transect 1",
min_abundance = 0.01,
max_abundance = NULL,
normalise_by = NULL,
scale_by = NULL,
color_vector = colorRampPalette(c("white", colsHeat))(50),
round = 1,
textmap = FALSE,
plot_functions = FALSE,
function_data = FALSE,
functions = c("MiDAS", "Filamentous", "AOB", "NOB", "PAO", "GAO"),
rel_widths = c(0.75, 0.25)
)## Warning: The data has already been normalised. Setting normalise = TRUE (the
## default) will normalise the data again and the relative abundance information
## about the original data of which the provided data is a subset will be lost.
## Warning: There are only 98 taxa, showing all
print(teste)## Warning: Transformation introduced infinite values in discrete y-axis
- Relative abundance of the top 20 ASVs